Regular Season vs. Postseason Overall Statistics
baseball %>%
mutate(year = as.numeric(year),
postseason = fct_recode(factor(postseason),
"Postseason" = "1",
"Regular Season" = "0"),
score = HomeScore + VisitScore,
AB = HomeAB + VisitAB,
H = HomeH + VisitH,
HR = HomeHR + VisitHR,
BB = HomeBB + VisitBB,
K = HomeK + VisitK,
SB = HomeSB + VisitSB,
LOB = HomeLOB + VisitLOB,
E = HomeE + VisitE) %>%
filter(year >= 1910) %>%
group_by(year, postseason) %>%
summarise(scoreavg = mean(score),
ABavg = mean(AB),
Havg = mean(H),
HRavg = mean(HR),
BBavg = mean(BB),
Kavg = mean(K),
SBavg = mean(SB),
LOBavg = mean(LOB),
Eavg = mean(E)) %>%
pivot_wider(names_from = "postseason", values_from = c(scoreavg, ABavg, Havg, HRavg, BBavg, Kavg, SBavg, LOBavg, Eavg)) %>%
data.frame() %>%
gt(rowname_col = "year") %>%
tab_spanner(label = "Runs",
columns = c(scoreavg_Regular.Season, scoreavg_Postseason)) %>%
tab_spanner(label = "At Bats",
columns = c(ABavg_Regular.Season, ABavg_Postseason)) %>%
tab_spanner(label = "Hits",
columns = c(Havg_Regular.Season, Havg_Postseason)) %>%
tab_spanner(label = "Home Runs",
columns = c(HRavg_Regular.Season, HRavg_Postseason)) %>%
tab_spanner(label = "Walks",
columns = c(BBavg_Regular.Season, BBavg_Postseason)) %>%
tab_spanner(label = "Strikeouts",
columns = c(Kavg_Regular.Season, Kavg_Postseason)) %>%
tab_spanner(label = "Stolen Bases",
columns = c(SBavg_Regular.Season, SBavg_Postseason)) %>%
tab_spanner(label = "Runners Left on Base",
columns = c(LOBavg_Regular.Season, LOBavg_Postseason)) %>%
tab_spanner(label = "Errors",
columns = c(Eavg_Regular.Season, Eavg_Postseason)) %>%
cols_label(scoreavg_Regular.Season = "Regular Season",
scoreavg_Postseason = "Postseason",
ABavg_Regular.Season = "Regular Season",
ABavg_Postseason = "Postseason",
Havg_Regular.Season = "Regular Season",
Havg_Postseason = "Postseason",
HRavg_Regular.Season = "Regular Season",
HRavg_Postseason = "Postseason",
BBavg_Regular.Season = "Regular Season",
BBavg_Postseason = "Postseason",
Kavg_Regular.Season = "Regular Season",
Kavg_Postseason = "Postseason",
SBavg_Regular.Season = "Regular Season",
SBavg_Postseason = "Postseason",
LOBavg_Regular.Season = "Regular Season",
LOBavg_Postseason = "Postseason",
Eavg_Regular.Season = "Regular Season",
Eavg_Postseason = "Postseason") %>%
tab_stubhead("Year") %>%
tab_header(title = "Comparison of Game Statistics in Regular Season vs. Postseason MLB Games") %>%
tab_source_note(source_note = "Missing values indicate seasons with an MLB lockouts") %>%
fmt_missing(columns = 1:19,
missing_text = "---") %>%
fmt_number(columns = 1:19,
decimals = 2) %>%
data_color(columns = c("scoreavg_Regular.Season", "scoreavg_Postseason"), # Color code based on value
colors = scales::col_numeric(
palette = c("white", "lightblue"),
domain = NULL)) %>%
data_color(columns = c("ABavg_Regular.Season", "ABavg_Postseason"), # Color code based on value
colors = scales::col_numeric(
palette = c("white", "lightblue"),
domain = NULL)) %>%
data_color(columns = c("Havg_Regular.Season", "Havg_Postseason"), # Color code based on value
colors = scales::col_numeric(
palette = c("white", "lightblue"),
domain = NULL)) %>%
data_color(columns = c("HRavg_Regular.Season", "HRavg_Postseason"), # Color code based on value
colors = scales::col_numeric(
palette = c("white", "lightblue"),
domain = NULL)) %>%
data_color(columns = c("BBavg_Regular.Season", "BBavg_Postseason"), # Color code based on value
colors = scales::col_numeric(
palette = c("white", "lightblue"),
domain = NULL)) %>%
data_color(columns = c("Kavg_Regular.Season", "Kavg_Postseason"), # Color code based on value
colors = scales::col_numeric(
palette = c("white", "lightblue"),
domain = NULL)) %>%
data_color(columns = c("SBavg_Regular.Season", "SBavg_Postseason"), # Color code based on value
colors = scales::col_numeric(
palette = c("white", "lightblue"),
domain = NULL)) %>%
data_color(columns = c("LOBavg_Regular.Season", "LOBavg_Postseason"), # Color code based on value
colors = scales::col_numeric(
palette = c("white", "lightblue"),
domain = NULL)) %>%
data_color(columns = c("Eavg_Regular.Season", "Eavg_Postseason"), # Color code based on value
colors = scales::col_numeric(
palette = c("white", "lightblue"),
domain = NULL)) %>%
gtsave(filename = "overallstats.png")
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

Overall Team Statistics in Regular Season vs. Postseason
away_baseball <- baseball %>%
select(starts_with("Visit"), postseason, year, Day) %>%
rename(Team = VisitTeam,
League = VisitLeague,
GameNum = VisitGameNum,
AB = VisitAB,
H = VisitH,
DBL = VisitDBL,
TRI = VisitTRI,
HR = VisitHR,
RBI = VisitRBI,
SH = VisitSH,
SF = VisitSF,
HBP = VisitHBP,
BB = VisitBB,
IBB = VisitIBB,
K = VisitK,
SB = VisitSB,
CS = VisitCS,
DP = VisitDP,
CI = VisitCI,
LOB = VisitLOB,
NumPitchers = VisitNumPitchers,
IER = VisitIER,
TeamER = VisitTeamER,
WP = VisitWP,
BK = VisitBK,
PO = VisitPO,
A = VisitA,
E = VisitE,
PB = VisitPB,
DPs = VisitDPs,
TPs = VisitTPs)
home_baseball <- baseball %>%
select(starts_with("Home"), postseason, year, Day) %>%
rename(Team = HomeTeam,
League = HomeLeague,
GameNum = HomeGameNum,
AB = HomeAB,
H = HomeH,
DBL = HomeDBL,
TRI = HomeTRI,
HR = HomeHR,
RBI = HomeRBI,
SH = HomeSH,
SF = HomeSF,
HBP = HomeHBP,
BB = HomeBB,
IBB = HomeIBB,
K = HomeK,
SB = HomeSB,
CS = HomeCS,
DP = HomeDP,
CI = HomeCI,
LOB = HomeLOB,
NumPitchers = HomeNumPitchers,
IER = HomeIER,
TeamER = HomeTeamER,
WP = HomeWP,
BK = HomeBK,
PO = HomePO,
A = HomeA,
E = HomeE,
PB = HomePB,
DPs = HomeDPs,
TPs = HomeTPs)
bind_rows(away_baseball, home_baseball) %>%
filter(year >= 1910 & Team %in% c("ANA", "BAL", "BOS", "CHA", "CLE", "DET", "HOU", "KCA", "MIN", "NYA", "OAK", "SEA", "TBA", "TEX", "TOR", "ARI", "ATL", "CHN", "CIN", "COL", "LAN", "SDN", "MIA", "MIL", "NYN", "PHI", "PIT", "SFN", "SLN", "WAS")) %>%
mutate(Team = factor(Team),
Team = fct_recode(Team,
"Los Angeles Angels" = "ANA",
"Baltimore Orioles" = "BAL",
"Boston Red Sox" = "BOS",
"Chicago White Sox" = "CHA",
"Cleveland Indians" = "CLE",
"Detroit Tigers" = "DET",
"Houston Astros" = "HOU",
"Kansas City Royals" = "KCA",
"Minnesota Twins" = "MIN",
"New York Yankees" = "NYA",
"Oakland Athletics" = "OAK",
"Seattle Mariners" = "SEA",
"Tampa Bay Rays" = "TBA",
"Texas Rangers" = "TEX",
"Toronto Blue Jays" = "TOR",
"Arizona Diamondbacks" = "ARI",
"Atlanta Braves" = "ATL",
"Chicago Cubs" = "CHN",
"Cincinnati Reds" = "CIN",
"Colorado Rockies" = "COL",
"Los Angeles Dodgers" = "LAN",
"San Diego Padres" = "SDN",
"Miami Marlins" = "MIA",
"Milwaukee Brewers" = "MIL",
"New York Mets" = "NYN",
"Philadelphia Phillies" = "PHI",
"Pittsburgh Pirates" = "PIT",
"San Francisco Giants" = "SFN",
"St. Louis Cardinals" = "SLN",
"Washington Nationals" = "WAS"),
Team = fct_relevel(Team, sort),
postseason = fct_recode(factor(postseason),
"Postseason" = "1",
"Regular Season" = "0")) %>%
group_by(Team, postseason) %>%
summarise(ABavg = mean(AB, na.rm = TRUE),
Havg = mean(H, na.rm = TRUE),
HRavg = mean(HR, na.rm = TRUE),
BBavg = mean(BB, na.rm = TRUE),
Kavg = mean(K, na.rm = TRUE),
SBavg = mean(SB, na.rm = TRUE),
LOBavg = mean(LOB, na.rm = TRUE),
Eavg = mean(E, na.rm = TRUE)) %>%
rename("Game Type" = postseason) %>%
arrange(Team) %>%
gt() %>%
cols_label(ABavg = "At Bats",
Havg = "Hits",
HRavg = "Home Runs",
BBavg = "Walks",
Kavg = "Strikeouts",
SBavg = "Stolen Bases",
LOBavg = "Left on Base",
Eavg = "Errors") %>%
fmt_number(columns = c("ABavg", "Havg", "HRavg", "BBavg", "Kavg", "SBavg", "LOBavg", "Eavg"),
decimals = 2) %>%
tab_stubhead("Game Type") %>%
tab_header(title = "Regular Season vs. Postseason Game Statistics by MLB Team") %>%
data_color(columns = c("ABavg", "Havg", "HRavg", "BBavg", "Kavg", "SBavg", "LOBavg", "Eavg"), # Color code based on value
colors = scales::col_numeric(
palette = c("white", "lightblue"),
domain = NULL)) %>%
cols_align(align = "center",
columns = c("ABavg", "Havg", "HRavg", "BBavg", "Kavg", "SBavg", "LOBavg", "Eavg")) %>%
gtsave(filename = "teampoststats.png")
## `summarise()` has grouped output by 'Team'. You can override using the
## `.groups` argument.
